##Heatmap
library(dplyr)
library(tidyr)
library(pheatmap)
library(openxlsx)
##Pull data for analysis
Liver_raw<-read.xlsx("../1_Input/2_Protein/Liver - PN-0064-1 Liver - mep.xlsx", colNames = T, rowNames = F, sheet = "Cleaned_NoMVs")
Liver_raw$Gene.Symbol<-make.unique(Liver_raw$Gene.Symbol, sep = ".")
Liver_raw.cleaned<-Liver_raw %>% mutate(Gene.Symbol = strsplit(as.character(Gene.Symbol), split = ";")) %>% unnest(Gene.Symbol)
Liver_raw.cleaned$Gene.Symbol<-make.unique(Liver_raw.cleaned$Gene.Symbol, sep = ".")
Liver_raw.complete.cleaned<-Liver_raw.cleaned[!is.na(Liver_raw.cleaned$Gene.Symbol),]
LFQs.Liver_raw<-dplyr::select(Liver_raw.complete.cleaned, contains("LFQ"))
rownames(LFQs.Liver_raw)<-Liver_raw.complete.cleaned$Gene.Symbol
Once we established that the populations under consideration truly display divergene expression patterns, we sought to determine whether unbiased global gene expression patterns recapitulate the described phenotypes within each Liver failure group. To accomplish this, an unsupervised Principal Components Analysis (PCA) was initially used with normalized counts.
Before running the principal components analysis, it was necessary to first determine the number of PC’s required to account for 80% of the variance, a machine-learning algorithmm benchmark that provides sufficient confidence in the analysis.
#Plot Features of the PCA
library(readxl)
library(dplyr)
library(plotly)
#transpose the dataset (required for PCA)
data.pca<-t(LFQs.Liver_raw)
data.pca<-as.data.frame(data.pca)
##Import the data to be used for annotation
Index<-c(0,0,0,0,1,1,1,1)
Index<-as.data.frame(Index)
##merge the file
data.pca_Final<-cbind(Index, data.pca)
rownames(data.pca_Final)<-data.pca_Final$Row.names
pca.comp<-prcomp(data.pca_Final[,(ncol(Index)+2):ncol(data.pca_Final)])
pcaCharts=function(x) {
x.var <- x$sdev ^ 2
x.pvar <- x.var/sum(x.var)
par(mfrow=c(2,2))
plot(x.pvar,xlab="Principal component",
ylab="Proportion of variance", ylim=c(0,1), type='b')
plot(cumsum(x.pvar),xlab="Principal component",
ylab="Cumulative Proportion of variance",
ylim=c(0,1),
type='b')
screeplot(x)
screeplot(x,type="l")
par(mfrow=c(1,1))
}
pcaCharts(pca.comp)
png(file=paste0("../2_Output/2_Protein/Proteomics_PCA.Charts.png"))
pcaCharts(pca.comp)
dev.off()
## quartz_off_screen
## 2
From the previous calculations, it is appears that 3 principal components are necessary (accounting for >80% cumulative variance).
##Create a 3D-PCA for Inspection
library(plotly)
##Index
PCs<-cbind(pca.comp$x, Index)
rownames(PCs)<-rownames(data.pca)
ax_text<-list(
family = "times",
size = 12,
color = "black")
t <- list(
family = "times",
size = 14,
color = "black")
pca <- plot_ly(PCs, x = ~PC1, y = ~PC2, z = ~PC3,
marker = list(color = ~Index,
colorscale = c('#FFE1A1', '#683531'),
showscale = TRUE),
text=rownames(PCs)) %>%
add_markers() %>%
layout(scene = list(
xaxis = list(title = 'PC1', zerolinewidth = 4,
zerolinecolor="darkgrey", linecolor="darkgrey",
linewidth=4, titlefont=t, tickfont=ax_text),
yaxis = list(title = 'PC2', zerolinewidth = 4,
zerolinecolor="darkgrey", linecolor="darkgrey",
linewidth=4, titlefont=t, tickfont=ax_text),
zaxis = list(title = 'PC3', zerolinewidth = 4,
zerolinecolor="darkgrey", linecolor="darkgrey",
linewidth=4, titlefont=t, tickfont=ax_text)),
annotations = list(
x = 1.13,
y = 1.03,
text = 'Diabetes',
xref = '1',
yref = '0',
showarrow = FALSE))
pca #must comment out for PDF generation via knitr (Pandoc)
library(pheatmap)
library(dplyr)
#Filter the data
Results_HM<-dplyr::filter(Liver_raw.complete.cleaned, minuslog_pval>2)
HM_data.p05<-data.matrix(dplyr::select(Results_HM, contains("LFQ")))
rownames(HM_data.p05)<-Results_HM$Gene.Symbol
#Import the Index File
paletteLength <- 100
myColor <- colorRampPalette(c("dodgerblue4", "white", "brown4"))(paletteLength)
pheatmap(HM_data.p05, color = myColor, scale = "row")
Heatmap and Unsupervised Hierarchical Clustering of Proteomics in Adipor1 knockout relative to wild-type.
pheatmap(HM_data.p05, color = myColor, scale = "row", filename = "../2_Output/2_Protein/Heatmap.Liver_LFQs.p01.pdf")
##Volcano Plot
##Load Protein Differential Expression
##Volcano Plot
# Load packages
library(dplyr)
library(ggplot2)
library(ggrepel)
# Read data from the web
results<-read.xlsx("../1_Input/2_Protein/Liver - PN-0064-1 Liver - mep.xlsx", sheet = "IPA_Import")
results = mutate(results, sig=ifelse(results$minuslog_pval>1.3 & abs(results$log2FC)>0.585, "p < 0.05 and |FC| > 1.5", "Not Sig"))
results<-dplyr::arrange(results, desc(sig), desc(abs(log2FC)))
LABEL=results$Gene[1:20]
#plot the ggplot
p = ggplot(results, aes(log2FC, minuslog_pval)) + theme(panel.background = element_rect("white", colour = "black", size=2), panel.grid.major = element_line(colour = "gray50", size=.75), panel.grid.minor = element_line(colour = "gray50", size=0.4)) +
geom_point(aes(fill=sig), colour="black", shape=21) + labs(x=expression(Log[2](Fold-Change)), y=expression(-Log[10](P-value))) + xlim(-5,5)+ geom_hline(yintercept = 0, size = 1) + geom_vline(xintercept=0, size=1)+
scale_fill_manual(values=c("black", "tomato"))
#add a repelling effect to the text labels.
p+geom_text_repel(data=filter(results, minuslog_pval>6 & abs(log2FC) > 1 | abs(log2FC) > 3 & minuslog_pval>2), aes(label=Gene))
pdf(file = "../2_Output/2_Protein/Volcano.Plot_Liver.pdf", width = 5.3, height = 4.5)
p+geom_text_repel(data=filter(results, minuslog_pval>6 & abs(log2FC) > 1 | abs(log2FC) > 3 & minuslog_pval>2), aes(label=Gene))
dev.off()
## quartz_off_screen
## 2